import yfinance as yf
import pandas as pd
import numpy as np
import holoviews as hv
import hvplot.pandas
import plotly.express as px
import panel as pn
import requests
import time
from bs4 import BeautifulSoup
pn.extension("tabulator")
pn.extension('plotly')
hv.extension('bokeh')
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler
from holoviews.plotting.links import RangeToolLink
from bokeh.models.widgets import AutocompleteInput
from datetime import datetime, timedelta
This notebook will create a stock data dashboard using panels, hvplot, and plotly. This dashboard will display various stock data metrics.
In the top center of the dashboard we will have the global filters which will allow the user to enter a stock ticker symbol, and will also allow the user to select a time period, and interval for the stock data. Below these filters in the center of the dashboard the center we will have an ohlc chart that shows stock data, and we will overlay a a LSTM Machine Learning Stock Prediction over top of the graph. Below this top ohlc chart we will have a line chart displaying stock close price, and below this a bar chart showing stock sale volume.
On the right hand side of the dashboard we will have three bar charts showing the stock markets top stock gainers, top losers, and most active stocks.
On the top left hand side of the dashboard we will have a table showing the stock options chain, and below that we will have a treemap/heatmap combination that will show the markets movers by stock market sector for the trading day.
Our dashboard will pull data using the yfinance python package, and will webscrape from yahoo finance website, and tradingview
We first need to create a textbox to input the desired stock ticker name. We will initialize an initial default ticker name before we create this.
# Set default values for when dashboard initializes
default_company_name = "SPY"
global_ticker = yf.Ticker(default_company_name)
ask_price = global_ticker.info["ask"]
bid_price = global_ticker.info["bid"]
day_low = global_ticker.info["dayLow"]
day_high = global_ticker.info["dayHigh"]
company_name = global_ticker.info["longName"]
currency = global_ticker.info["currency"]
company_name = pn.pane.Markdown(
f"# <u>Selected Company/Index:</u> \t {default_company_name} (Default) \n" +
f"# <u>Selected Company Name:</u> {company_name} \n" +
f"# Ask: {ask_price} \t {currency}\t // Day Low \t {day_low} \t {currency}\n" +
f"# Bid: {bid_price} \t {currency}\t // Day High \t {day_high} \t {currency} "
)
# Create the input pane
text_input = pn.widgets.TextInput(name="Company Symbol", placeholder="Enter a Company or Index abbreviation here...")
text_input
Now we need to ensure that this will update after the user hits enter. To do this we need to use panels link functionality. This will update the markdown cell above our text input cell after the company name has been entered
# Updates the markdown pane
def callback(target, event):
global global_ticker
company_symbol = event.new.upper()
global_ticker = yf.Ticker(company_symbol)
ask_price = global_ticker.info["ask"]
bid_price = global_ticker.info["bid"]
day_low = global_ticker.info["dayLow"]
day_high = global_ticker.info["dayHigh"]
company_name = global_ticker.info["longName"]
currency = global_ticker.info["currency"]
target.object = (
f"# <u>Selected Company/Index:</u> \t {company_symbol} \n" +
f"# <u>Selected Company Name:</u> {company_name} \n" +
f"# Ask: {ask_price} \t {currency}\t // Day Low \t {day_low} \t {currency}\n" +
f"# Bid: {bid_price} \t {currency}\t // Day High \t {day_high} \t {currency} "
)
# Link our markdown and our input panes
text_input.link(company_name, callbacks={'value': callback})
# Create panel combining the two
company_input_pane = pn.Column(company_name, text_input)
company_input_pane.servable()
Our global filter is almost complete. We now need to be able to filter for a select time period and interval to determine what stock data we want to look at, and our global filter will be complete. I want to be able to adjust the dashboard by time period and time interval. The yf.Ticker() function allows these two options to be specified. Lets create two dropdowns that will later allow us to adjust our graphs. After looking at the options it appears that there are only smaller intervals for recent dates. For example we cannot get stock prices every 1 minute for the last ten years. To adjust for this we will make the two options interact with each other to ensure that only available options are shown to the user in our dashboard:
# The mapping of time periods to compatible intervals
time_period_to_intervals = {
"1y": ["1d", "5d", "1wk", "1mo", "3mo"], # Default dropdown
"max": ["1d", "5d", "1wk", "1mo", "3mo"],
"10y": ["1d", "5d", "1wk", "1mo", "3mo"],
"5y": ["1d", "5d", "1wk", "1mo", "3mo"],
"2y": ["1d", "5d", "1wk", "1mo", "3mo"],
"6mo": ["1d", "5d", "1wk", "1mo"],
"3mo": [ "60m", "1h", "90m", "1d", "5d", "1wk", "1mo"],
"1mo": ["2m", "5m", "15m", "30m", "60m", "1h", "90m", "1d", "5d", "1wk"],
"5d": ["1m", "2m", "5m", "15m", "30m", "60m", "1h", "90m", "1d"],
"1d": ["1m", "2m", "5m", "15m", "30m", "60m", "90m"]
}
# Create Drop-Down widgets
selected_time_period = pn.widgets.Select(name='Time Period', options=list(time_period_to_intervals.keys()))
selected_interval = pn.widgets.Select(name='Interval', options=[])
# Function to update interval options based on selected time period
def update_interval_options(time_period):
interval_options = time_period_to_intervals.get(time_period, [])
selected_interval.options = interval_options
selected_interval.value = interval_options[0] if interval_options else None
# Watch for changes in the time period selection
selected_time_period.param.watch(lambda event: update_interval_options(event.new), 'value')
# Initialize interval options based on the default time period selection
update_interval_options(selected_time_period.value)
button_layout_pane = pn.Row(selected_time_period, selected_interval)
button_layout_pane.servable()
In the above code the param.watch will check for any updates in the time period drop down, and will then update the interval options. These are complete so now all we need to do is combine them into a panel widgetbox, and we will then be able to add it to our dashboard. Lastly we need to create our widget box that will go above our center plots, and control everything on the dashboard.
center_widget_box = pn.WidgetBox(company_input_pane, button_layout_pane, align="center")
center_widget_box.servable()
Now we will build our predictive machine learning algorithm so that we can tie it in with our function to pull the stock data. We will then be able to link the function to get stock data with the filters we just made, and have it train the model after it pulls the updated data. Lets build the model with a test ticker dataset.
# ticker = yf.Ticker("NVDA")
# nvda = ticker.history(period="1y", interval="1d")
# nvda.head()
# nvda = nvda.reset_index()
# nvda.info()
Dataframe looks good and is reading everything with the correct datatype. We will create a windowing function that will train the model up to a target date, and will use three days before the target date:
# def window_data(data, n=3):
# windowed_data = pd.DataFrame()
# for i in range(n, 0, -1):
# windowed_data[f'Target-{i}'] = data['Close'].shift(i)
# windowed_data['Target'] = data['Close']
# windowed_data['Date'] = data['Date']
# return windowed_data.dropna()
# window_df = window_data(nvda)
# window_df.head()
# def window_df_to_date_X_Y(windowed_df):
# numpy_df = windowed_df.to_numpy() # Convert to numpy for speed in TensorFlow
# dates = numpy_df[:, -1] # Get date
# left_matrix = numpy_df[:,0:-2] # Get target 3-1
# X = left_matrix.reshape((len(dates), left_matrix.shape[1], 1))
# Y = numpy_df[:,-2]
# return dates, X.astype(np.float32), Y.astype(np.float32)
# dates, X, y = window_df_to_date_X_Y(window_df)
# dates.shape, X.shape, y.shape
Now lets do train, test, validate for our model.
# q80 = int(len(dates) * 0.8) # Get 80% of dates for training
# q90 = int(len(dates) * 0.9) # Will use for 10% to validate
# dates_train, X_train, y_train = dates[:q80], X[:q80], y[:q80]
# dates_val, X_val, y_val = dates[q80:q90], X[q80:q90], y[q80:q90]
# dates_test, X_test, y_test = dates[q90:], X[q90:], y[q90:]
# model = Sequential([layers.Input((3,1)),
# layers.LSTM(64), # Number of neurons. May need to lower depending on dash
# layers.Dense(32, activation="relu"), # Layers
# layers.Dense(32, activation="relu"),
# layers.Dense(1)])
# model.compile(loss="mse", optimizer=Adam(learning_rate=0.001),
# metrics=["mean_absolute_error"])
# model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100) # 100 runs through the dataset
Lets visualize
# train_predictions = model.predict(X_train).flatten()
# test_predictions = model.predict(X_test).flatten()
# val_predictions = model.predict(X_val).flatten()
# train_predictions_df = pd.DataFrame({'Date': dates_train, 'Value': train_predictions})
# test_predictions_df = pd.DataFrame({'Date': dates_test, 'Value': test_predictions})
# training_df = pd.DataFrame({'Date': dates_train, 'Value': y_train})
# val_predictions_df = pd.DataFrame({'Date': dates_val, 'Value': val_predictions})
# p1 = train_predictions_df.hvplot.line(x='Date', y='Value', label='Predictions-Train', color="darkorange")
# p2 = test_predictions_df.hvplot.line(x='Date', y='Value', label='Predictions-Test', color="blue")
# p3 = val_predictions_df.hvplot.line(x='Date', y='Value', label='Predictions-Validation', color="red")
# p4 = nvda.hvplot.line(x='Date', y='Close', label='Actual', color="green")
# plot = (p1 * p2 * p3 * p4)
# plot.opts(legend_position="top_left", title="LSTM Model Performance")
# plot
This looks good, and now we need to build our function to train on our data. Differently we will need to add a function to predict out past the last known date.
def lstm_prediction(df, variable):
""" Accepts a dataframe and a column to predict, and returns the predictions dataframe
"""
number_lookback = 6 # The number of past points to predict future points, more is slower
def window_data(data, n=3):
windowed_data = pd.DataFrame()
for i in range(n, 0, -1):
windowed_data[f'Target-{i}'] = data[variable].shift(i)
windowed_data['Target'] = data[variable]
windowed_data['Datetime'] = data['Datetime']
return windowed_data.dropna()
def window_df_to_date_X_Y(windowed_df):
numpy_df = windowed_df.to_numpy() # Convert to numpy for speed in TensorFlow
dates = numpy_df[:, -1] # Get date, validated
left_matrix = numpy_df[:,0:-2] # Get target 3-1, validated, ()
X = left_matrix.reshape((len(dates), left_matrix.shape[1], 1))
Y = numpy_df[:,-2]
return dates, X.astype(np.float32), Y.astype(np.float32) # Shapes (248,) (248, 6, 1) (248,)
df.ffill(inplace=True) # Handle null
window_df = window_data(df, number_lookback)
dates, X, y = window_df_to_date_X_Y(window_df)
# print(dates.shape, X.shape, y.shape)
# scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
y = y.reshape(-1, 1)
scaler = scaler.fit(y)
y= scaler.transform(y)
y = y.reshape(-1,)
# print(y.shape)
q80 = int(len(dates) * 0.8) # Get 80% of dates for training
q90 = int(len(dates) * 0.9) # Will use for 10% to validate
# Get sequential training, validation, testing data
dates_train, X_train, y_train = dates[:q80], X[:q80], y[:q80]
dates_val, X_val, y_val = dates[q80:q90], X[q80:q90], y[q80:q90]
dates_test, X_test, y_test = dates[q90:], X[q90:], y[q90:]
# For small datasets this fixes the issue where these return null
min_validation_size = 3 # Minimum number of data points for validation set
# Check if the validation set is too small and adjust if necessary
if (len(dates_val) < min_validation_size) or (len(X_val) < min_validation_size) or (len(y_val) < min_validation_size):
dates_val = dates[-min_validation_size:-1]
X_val = X[-min_validation_size:-1]
y_val = y[-min_validation_size:-1]
model = Sequential([layers.Input((number_lookback,1)),
layers.LSTM(64), # Number of neurons. May need to lower depending on dash
layers.Dense(32, activation="relu"), # Layers, relu helps solve vanishing gradient issue
layers.Dense(32, activation="relu"),
layers.Dense(1)]
)
model.compile(loss="mse",
optimizer=Adam(learning_rate=0.001),
metrics=["mean_absolute_error"]
)
model.fit(X_train,
y_train,
validation_data=(X_val, y_val),
epochs=50, # 50 runs through the dataset
verbose=0 # Limits text while training model
)
future_step=3
def predict_future(model, last_sequence, steps=15):
future_predictions = []
current_sequence = last_sequence.copy()
for _ in range(steps):
current_sequence_reshaped = np.array(current_sequence[-number_lookback:]).reshape((1, number_lookback, 1))
next_step_prediction = model.predict(current_sequence_reshaped, verbose=0)[0, 0]
future_predictions.append(next_step_prediction)
current_sequence.append(next_step_prediction)
return np.array(future_predictions)
# Prepare the last observed values
last_sequence = X_test[-1].flatten().tolist()
# Predict the next 3 steps from the last 6 steps
predicted_future = predict_future(model, last_sequence, steps=future_step)
# Transform predictions back to the original scale
predicted_future_reshaped = predicted_future.reshape(-1, 1)
predicted_future_original_scale = scaler.inverse_transform(predicted_future_reshaped)
predicted_future_original_scale = predicted_future_original_scale.flatten()
# Get the future dates we predicted to plot
last_date = pd.to_datetime(dates[-1])
future_dates = [last_date + timedelta(days=i) for i in range(1, future_step+1)]
# Lets see what our predictions are over the whole time period and plot
train_pred = model.predict(X_train, verbose=0)
test_pred = model.predict(X_test, verbose=0)
val_pred = model.predict(X_val, verbose=0)
# Predictions will be normalized so we need to get actual value
train_predictions = scaler.inverse_transform(train_pred)
test_predictions = scaler.inverse_transform(test_pred)
val_predictions = scaler.inverse_transform(val_pred)
train_predictions_df = pd.DataFrame({"Datetime": dates_train, variable: train_predictions.flatten()})
test_predictions_df = pd.DataFrame({"Datetime": dates_test, variable: test_predictions.flatten()})
val_predictions_df = pd.DataFrame({"Datetime": dates_val, variable: val_predictions.flatten()})
predictions_df = pd.DataFrame({"Datetime": future_dates, variable: predicted_future_original_scale}) # Future predictions
all_predictions_df = pd.concat([train_predictions_df, test_predictions_df, val_predictions_df, predictions_df], ignore_index=True)
# all_predictions_df = all_predictions_df.sort_values(by='Datetime')
# p1 = train_predictions_df.hvplot.line(x='Date', y='Value', label='Predictions-Train', color="darkorange")
# p2 = test_predictions_df.hvplot.line(x='Date', y='Value', label='Predictions-Test', color="blue")
# p3 = val_predictions_df.hvplot.line(x='Date', y='Value', label='Predictions-Validation', color="red")
# p4 = predictions_df.hvplot.line(x='Datetime', y='Value', label='Predictions', color="darkorange")
# plot = (p1 * p2 * p3 * p4)
return all_predictions_df
# ticker = yf.Ticker("MSFT")
# df = ticker.history(period="1y", interval="1d")
# df = df.reset_index()
# df = df.rename(columns={"Date":"Datetime"})
# pd.set_option('display.max_rows', 500)
# temp_df = lstm_prediction(df, "Close")
# display(temp_df)
Well that was much more painful than I expected, but it looks like we can get a prediction that converges a short range out in the future. I will surpress the output for the model with verbose=0, but we are getting decent accuracy here with a relatively low MSE. Further hyperparameter tuning may be needed as it varies depending on the size of the dataset. It seems that for more recent intervals like one year or six months it performs better.
Now we will use the filter values to control how our data is pulled so that our visualizations are interactive, and interactively update. We will build a function that pulls data, and then trains our model. We will add our model predictions to our stock dataframe to plot later.
def get_stock_data(company, time_period, interval):
# When dashboard first opens it will set to defualt, then the input value
if company == "":
company = default_company_name # Default is global String "SPY"
else:
company = company.upper()
try:
# Use global ticker defined earlier to get stock history
temp_df = global_ticker.history(period=time_period, interval=interval)
# Error handling
if temp_df.empty:
raise ValueError(f"No data available for {company} for the selected time period {time_period} and interval: {interval}")
temp_df.index.name = "Datetime"
# Transform dataframe and make interactive
stock_df = temp_df.reset_index()
stock_df["Datetime"] = pd.to_datetime(stock_df["Datetime"])
prediction_df = lstm_prediction(stock_df, "Close") # LSTM Prediction
merged_df = pd.merge(stock_df, prediction_df, suffixes=('', '_Predicted'), on="Datetime", how="outer", )
return merged_df
except Exception as e:
print(f"An error occurred while fetching the stock data: {e}")
return pd.DataFrame()
Now lets bind the get_stock_data to our widgets so that they control what data is fetched. This will update our data as we modify our widget options.
bound_get_stock_data = pn.bind(get_stock_data, company=text_input, time_period=selected_time_period, interval=selected_interval)
Now the dataset will update as our filters are trained.
We now build our line chart of our predictive model overlain with our OHLC candlestick chart.
def plot_center(stock_df):
# Check if the Stock Dataframe is empty of available
if stock_df.empty:
return pn.pane.Markdown("No data available for the selected parameters.")
last_updated = datetime.now()
formatted_dt = last_updated.strftime('%Y-%m-%d %H:%M:%S')
# Get the three plots we want on our dashboard
ohlc = stock_df.hvplot.ohlc(x="Datetime",
ylabel='Price ($)',
height=400,
grid=True,
xaxis=None,
title=f"OHLC Plot",
bar_width=0.9,
line_color="gray"
)
overview = stock_df.hvplot.ohlc(xaxis=True,
ylabel="Close ($)",
height=200,
color="blue",
grid=True,
title="Line Chart",
legend=True
)
model_predictions = stock_df.hvplot.line(x="Datetime",
y="Close_Predicted",
xaxis=True,
ylabel="Close ($)",
height=200,
color="darkorange",
alpha=0.5, # Tranparency
grid=True,
title="Line Chart",
legend=True
)
ohlc = ohlc * model_predictions # Combine the plots overtop of one another
volume = stock_df.hvplot.step(x='Datetime',
y='Volume',
xaxis=None,
height=200,
title=f"Volume (Updated {formatted_dt})",
grid=True,
legend=True,
)
RangeToolLink(overview.get(0), ohlc.get(0))
# Merge the plots and return the merged plot
center_plot = (volume + ohlc + overview).cols(1)
center_plot.opts(merge_tools=True,
toolbar="below",
legend_position='right', #
sync_legends=True
)
return center_plot
Now we need to make this dynamically update based on our widget pane. To accomplish this we can use Panels Bind function to sync the two panes.
# Define a function that will link our get data and plot functions
def reactive_plot_center(company, time_period, interval):
stock_df = bound_get_stock_data() # This fetches the data based on current widget values. No need to pass variables they are global
return plot_center(stock_df) # Call plot_center with the fetched data
# pn.bind creates a dynamic link between the widgets and the function
reactive_plot = pn.bind(reactive_plot_center, company=text_input, time_period=selected_time_period, interval=selected_interval)
# Create a Panel layout with the widgets and the reactive plot
center_panel = pn.Column(center_widget_box, reactive_plot, align="center")
center_panel.servable()
Unfortunately I initially built this program utilizing an Alpha Vantage API, but unfortunately they lowered their free API daily limit from 500 to 25 calls per day. As a result we will use beautiful soup to web scrape the data we need for these bar charts. In our mockup we determined we would like to plot the top volume stocks, top gaining stocks, and top losing stocks. Lets start with our scraping algorithm
After looking at the website all three pages have the same layout, and so we can create a function to scrape this data.Their robots.txt does not have these web pages as disallowed so we will not be violating their terms of service. Lets define a function to scrape this data:
def scrape_yahoo(page):
# URL of Yahoo Finance page
url = f"https://finance.yahoo.com/{page}/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Find table that contains the stock data
table = soup.find('table', {'class': 'W(100%)'})
columns = ["ticker", "company_name", "price", "change", "percent_change", "volume"]
df = pd.DataFrame(columns= columns)
# Loop through the rows in the table and extract data
for row in table.find_all('tr')[1:]: # Skip header row
columns = row.find_all('td')
row_data = {
"ticker": columns[0].text, # Ticker symbol
"company_name": columns[1].text, # Company name
"price": columns[2].text, # Price
"change": columns[3].text, # Change
"percent_change": columns[4].text, # % Change
"volume": columns[5].text # Volume
}
new_row = pd.DataFrame(row_data, index=[0])
df = pd.concat([df, new_row], ignore_index=True)
# We must define the column types, and then can convert to their proper format using pandas.replace and regex
df = df.astype({"ticker": str, "company_name": str, "price": str, "change": str, "percent_change": str, "volume": str})
# We must remove all non ascii elements to ensure the data is good to convert to floats
def remove_non_ascii(text):
return text.encode('ascii', 'ignore').decode('ascii')
for col in columns:
df[col] = df[col].apply(remove_non_ascii)
# Now we must convert volume values to float
def convert_volume_to_float(value):
multipliers = {'T': 1e12, 'B': 1e9, 'M': 1e6, 'K': 1e3}
if value[-1] in multipliers:
number = float(value[:-1]) # Get number without symbol
return number * multipliers[value[-1]]
else:
return float(value)
df["volume"] = df["volume"].str.replace(",", "").apply(convert_volume_to_float)
df["change"] = df["change"].str.replace("[^0-9.-]", "", regex=True).astype(float)
df["percent_change"] = df["percent_change"].str.replace("[^0-9.-]", "", regex=True).astype(float)
df["price"] = df["price"].str.replace("[^0-9.-]", "", regex=True).astype(float)
return df
Now we have defined our function, we can get our data. We will built in error handling to read from an old csv in case they change their website so that the dashboard will still display.
We will need to sort these so our plots show correctly, and lets also get the current time so we can see on our dashboard when the data was last scraped.
try:
top_gainer_df = scrape_yahoo("gainers")
top_gainer_df = top_gainer_df.sort_values("percent_change", ascending=True)
time.sleep(5)
except Exception as e:
print(f"Error occurred while scraping gainers: {e}")
top_gainer_df = pd.read_csv("data/top_gainer.csv")
try:
top_loser_df = scrape_yahoo("losers")
top_loser_df = top_loser_df.sort_values("data/percent_change", ascending=False)
time.sleep(5)
except Exception as e:
print(f"Error occurred while scraping losers: {e}")
top_loser_df = pd.read_csv("data/top_loser.csv")
try:
most_active_df = scrape_yahoo("most-active")
most_active_df = most_active_df.sort_values("volume", ascending=True)
except Exception as e:
print(f"Error occurred while scraping most active stocks: {e}")
most_active_df = pd.read_csv("data/most_active.csv")
last_updated = datetime.now()
formatted_dt = last_updated.strftime('%Y-%m-%d %H:%M:%S')
Error occurred while scraping losers: 'data/percent_change'
# display(top_gainer_df)
# display(top_loser_df)
# display(most_active_df)
# display(most_active_df)
Looks great..We now have everything we need for our right most pane.
Now we can plot the bar charts and create our right pane
gainer_plot = top_gainer_df.hvplot.barh(
x="ticker",
y="percent_change",
xlabel="Company",
ylabel = "Percentage Change",
hover_cols=['company_name', 'price', 'volume'],
height=380, color="green",
title=f"Top Gainers (%) - (Updated {formatted_dt})"
).opts(
shared_axes=False
)
loser_plot = top_loser_df.hvplot.barh(
x="ticker",
y="percent_change",
xlabel="Company",
ylabel = "Percentage Change",
hover_cols=['company_name', 'price', 'volume'],
height=380,
color="red",
title=f"Top Losers (%)"
).opts(
shared_axes=False
)
most_active_plot = most_active_df.hvplot.barh(
x="ticker",
y="volume",
xlabel="Company",
ylabel = "Volume",
hover_cols=['company_name', 'price', 'volume'],
height=380,
color="blue",
title=f"Most Actively Traded"
).opts(
shared_axes=False
)
right_panel = pn.Column(gainer_plot, loser_plot, most_active_plot)
right_panel.servable()
Left Panel- Options Table and Market Sector TreeMap ¶
I want to make a heatmap of the options chain so lets download that data and start looking at it. I want to display all options that will expire within the next month. Options expire on Fridays so we need to ensure our code only gets Fridays for the next four weeks.
We need to define functions to get the calls and puts options data and merge it for our table. We want the next month of data so we need to get options expiring every friday for the next month.
def get_next_friday(start_date):
days_until_friday = (4 - start_date.weekday() + 7) % 7
next_friday = start_date + timedelta(days=days_until_friday)
return next_friday
def get_options_chains(company_symbol):
ticker = yf.Ticker(company_symbol)
start_date = datetime.now()
next_friday = get_next_friday(start_date)
all_calls = []
all_puts = []
for _ in range(4): # Next four Fridays
formatted_options_dt = next_friday.strftime("%Y-%m-%d")
try:
options = ticker.option_chain(formatted_options_dt)
calls_df = options.calls
puts_df = options.puts
all_calls.append(calls_df)
all_puts.append(puts_df)
# Add expiration date column
calls_df["expiration"] = formatted_options_dt
puts_df["expiration"] = formatted_options_dt
except Exception as e:
print(f"Error fetching options for {formatted_options_dt}: {e}")
next_friday += timedelta(days=7) # Move to the next Friday
# Concatenate all dataframes
all_calls_df = pd.concat(all_calls, ignore_index=True)
all_puts_df = pd.concat(all_puts, ignore_index=True)
return all_calls_df, all_puts_df
# calls_df, puts_df = get_options_chains("")
# calls_df.head()
Looks good lets make our table.
Now to format. We want a table that contains: Expiration, calls(bid, ask) strike, puts(bid, ask)
def get_options_table(calls_df, puts_df):
merged_df = pd.merge(calls_df, puts_df, on=["expiration", "strike"], suffixes=('_call', '_put'))
merged_df.columns = [col.capitalize() for col in merged_df.columns] # capitalize columns
merged_df.set_index("Expiration", inplace=True) # Set expiration as index
short_merged_df = merged_df[["Bid_call", "Ask_call", "Strike", "Bid_put", "Ask_put"]]
table = pn.widgets.Tabulator(short_merged_df, pagination="remote", page_size=17, theme="default"
)
return table
def reactive_options_chains(company_symbol):
if not company_symbol: # If no company symbol is entered, use the default
company_symbol = default_company_name
calls_df, puts_df = get_options_chains(company_symbol)
return get_options_table(calls_df, puts_df)
# Link the input to the reactive function
reactive_table = pn.bind(reactive_options_chains, company_symbol=text_input)
title = pn.pane.Markdown("### Options Chain")
options_panel = pn.Column(title, reactive_table, align="center")
options_panel.servable()
We want to create a treemap of the top sectors and their movements. To do this we will pull the data from trading view sectors page. Yahoos rendered in JavaScript and trying to extract with selenium was too complex. I have read their robots.txt file and confirmed that scraping this page does not violate their terms of service.
def scrape_tradingview_sectors():
# URL of Tradingview page
url = f"https://www.tradingview.com/markets/stocks-usa/sectorandindustry-sector/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Find table that contains the stock data
table = soup.find('table', class_="table-Ngq2xrcG")
columns= ["sector", "market_cap", "div_yield", "day_change", "volume", "industries", "stocks"]
df = pd.DataFrame(columns=columns)
for row in table.find_all('tr')[1:]: # Skip header row
columns = row.find_all('td')
row_data = {
"sector": columns[0].text,
"market_cap": columns[1].text,
"div_yield": columns[2].text,
"day_change": columns[3].text,
"volume": columns[4].text,
"industries": columns[5].text,
"stocks": columns[6].text
}
new_row = pd.DataFrame(row_data, index=[0])
df = pd.concat([df, new_row], ignore_index=True)
def convert_market_cap_volume(value):
multipliers = {'T': 1e12, 'B': 1e9, 'M': 1e6, 'K': 1e3}
number = float(value[:-1])
return number * multipliers[value[-1]]
# We must remove all non ascii elements to ensure the data is good to convert to floats
def remove_non_ascii(text):
return text.encode('ascii', 'ignore').decode('ascii')
for col in columns:
df[col] = df[col].apply(remove_non_ascii)
df["market_cap"] = df["market_cap"].str.replace(" USD", '').apply(convert_market_cap_volume)
df["volume"] = df["volume"].apply(convert_market_cap_volume)
df["div_yield"] = df["div_yield"].str.replace("[^0-9.-]", "", regex=True).astype(float)
df['day_change'] = df['day_change'].str.replace(u'\u2212', '-')
df["day_change"] = df["day_change"].str.replace("[^0-9.-]", "", regex=True).astype(float)
df["industries"] = df["industries"].astype(float)
df["stocks"] = df["stocks"].astype(float)
return df
Same as before we will implement error handling to read from a csv if the webpage changes.
try:
sector_df = scrape_tradingview_sectors()
except Exception as e:
print(f"Error occurred while scraping sectors: {e}")
sector_df = pd.read_csv("data/sector_industry.csv")
last_updated = datetime.now()
formatted_dt = last_updated.strftime('%Y-%m-%d %H:%M:%S')
display(sector_df)
| sector | market_cap | div_yield | day_change | volume | industries | stocks | |
|---|---|---|---|---|---|---|---|
| 0 | Commercial Services | 1.703000e+12 | 0.82 | 0.72 | 3382000.0 | 5.0 | 251.0 |
| 1 | Communications | 8.644140e+11 | 4.52 | -0.40 | 11772000.0 | 3.0 | 49.0 |
| 2 | Consumer Durables | 1.836000e+12 | 1.08 | -0.57 | 43553000.0 | 8.0 | 160.0 |
| 3 | Consumer Non-Durables | 2.865000e+12 | 2.93 | 0.78 | 5665000.0 | 9.0 | 165.0 |
| 4 | Consumer Services | 2.097000e+12 | 1.15 | 1.06 | 5466000.0 | 10.0 | 240.0 |
| 5 | Distribution Services | 5.662990e+11 | 1.40 | 0.78 | 1947000.0 | 4.0 | 84.0 |
| 6 | Electronic Technology | 8.868000e+12 | 0.91 | 0.64 | 33245000.0 | 9.0 | 347.0 |
| 7 | Energy Minerals | 2.561000e+12 | 5.00 | -0.19 | 7793000.0 | 4.0 | 121.0 |
| 8 | Finance | 9.743000e+12 | 2.59 | 0.44 | 3855000.0 | 14.0 | 1471.0 |
| 9 | Health Services | 1.099000e+12 | 1.12 | 0.97 | 3050000.0 | 4.0 | 94.0 |
| 10 | Health Technology | 5.814000e+12 | 1.65 | 0.38 | 4833000.0 | 5.0 | 1043.0 |
| 11 | Industrial Services | 9.959460e+11 | 3.93 | -0.09 | 4210000.0 | 5.0 | 136.0 |
| 12 | Miscellaneous | 2.069450e+11 | 7.31 | -0.18 | 446591.0 | 2.0 | 3730.0 |
| 13 | Non-Energy Minerals | 1.123000e+12 | 3.18 | -0.24 | 3895000.0 | 6.0 | 171.0 |
| 14 | Process Industries | 1.075000e+12 | 2.18 | 0.58 | 1762000.0 | 8.0 | 174.0 |
| 15 | Producer Manufacturing | 1.982000e+12 | 1.37 | 1.08 | 2189000.0 | 9.0 | 296.0 |
| 16 | Retail Trade | 4.257000e+12 | 0.86 | 0.28 | 22001000.0 | 10.0 | 193.0 |
| 17 | Technology Services | 1.239200e+13 | 0.48 | -0.25 | 17662000.0 | 4.0 | 621.0 |
| 18 | Transportation | 1.224000e+12 | 1.75 | 0.59 | 6066000.0 | 6.0 | 147.0 |
| 19 | Utilities | 1.366000e+12 | 3.68 | 0.36 | 3664000.0 | 4.0 | 113.0 |
# df["All Sectors"] = "All Sectors" # In order to have a single root node
# def plot_treemap():
# # Need to normalize data for the color scale
# day_change_min = df['day_change'].min()
# day_change_max = df['day_change'].max()
# zero_normalized = -day_change_min / (day_change_max - day_change_min)
# # Define a custom color scale
# color_scale = [
# [0, 'darkred'], # dark red for negative values
# [zero_normalized, 'white'], # white at zero
# [1, 'darkgreen'] # dark green for positive values
# ]
# last_updated = datetime.now()
# formatted_dt = last_updated.strftime('%Y-%m-%d %H:%M')
# fig = px.treemap(df, path=["All Sectors", "sector"], values="market_cap",
# color="day_change", color_continuous_scale=color_scale,
# hover_data=["market_cap", "day_change", "volume", "industries", "stocks"],
# width=600, height=600 )
# # paper_bgcolor='black',
# # plot_bgcolor='black'))
# fig.update_traces(root_color="lightgrey")
# fig.update_layout(margin=dict(t=40, l=5, r=5, b=5), title=f"Stock Sectors/Industry Day Change-(Updated-{formatted_dt})")
# return fig
# treemap_plot = plot_treemap()
# treemap_plot
This looks good, but now I want to add the industries inside each of these sectors. We need to modify our scraping algorithm to get the list of industries in each sector. Lets build this now.
def scrape_tradingview_industries():
sectors = ["commercial-services" , "consumer-durables", "consumer-non-durables", "consumer-services",
"distribution-services", "electronic-technology", "energy-minerals", "finance", "health-services", "health-technology",
"industrial-services", "miscellaneous", "non-energy-minerals", "process-industries", "producer-manufacturing",
"retail-trade", "technology-services", "transportation", "utilities"]
columns = ["sector", "industry", "market_cap", "div_yield", "day_change", "volume", "stocks"]
df = pd.DataFrame(columns=columns)
temp_df = pd.DataFrame(columns=columns)
for sector in sectors:
time.sleep(3) # Wait 3 second so that the website doesnt block our requests
# URL of Yahoo Finance Gainers page
url = f"https://www.tradingview.com/markets/stocks-usa/sectorandindustry-sector/{sector}/industries/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Find table that contains the stock data
table = soup.find('table', class_="table-Ngq2xrcG")
for row in table.find_all('tr')[1:]: # Skip header row
columns = row.find_all('td')
row_data = {
"sector": sector,
"industry": columns[0].text,
"market_cap": columns[1].text,
"div_yield": columns[2].text,
"day_change": columns[3].text,
"volume": columns[4].text,
"stocks": columns[5].text
}
new_row = pd.DataFrame(row_data, index=[0])
for col in new_row.columns:
new_row[col] = new_row[col].str.replace('\u2212(?=[0-9])', "-", regex=True) # Change any -- values to negative
new_row[col] = new_row[col].str.replace("\u2014", "0", regex=True) # Remove any -- values that represent null
def convert_market_cap_volume(value):
multipliers = {'T': 1e12, 'B': 1e9, 'M': 1e6, 'K': 1e3}
# Check if the value is a string and has a length greater than 1
if isinstance(value, str) and len(value) > 1:
number = float(value[:-1])
return number * multipliers[value[-1]]
else:
# Handle non-string or malformed values
return value
# Apply the conversion function to the 'market_cap' and 'volume' columns
new_row["market_cap"] = new_row["market_cap"].str.replace(" USD", '').apply(convert_market_cap_volume)
new_row["volume"] = new_row["volume"].apply(convert_market_cap_volume)
# Convert 'div_yield' and 'day_change' to float, removing non-numeric characters
new_row["div_yield"] = new_row["div_yield"].str.replace("[^0-9.-]", "", regex=True).astype(float)
# Replace unicode minus with standard minus, and remove %, conver to float
# temp_df['day_change'] = temp_df['day_change'].str.replace(u'\u2212', '-')
new_row["day_change"] = new_row["day_change"].str.replace("[^0-9.-]", "", regex=True).astype(float)
# Convert stocks to float
new_row["stocks"] = new_row["stocks"].astype(float)
temp_df = pd.concat([temp_df, new_row], ignore_index=True)
return temp_df
This next part takes a little bit of time as we do not want to overload their server and get our IP blocked. We do not violate their terms of service, but still don't want to cause them any problems.
try:
industry_df = scrape_tradingview_industries()
display(industry_df)
except Exception as e:
print(f"Error occurred while scraping sectors: {e}")
industry_df = pd.read_csv("secto_industry.csv")
C:\Users\brand\AppData\Local\Temp\ipykernel_20724\3225123608.py:63: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation. temp_df = pd.concat([temp_df, new_row], ignore_index=True)
| sector | industry | market_cap | div_yield | day_change | volume | stocks | |
|---|---|---|---|---|---|---|---|
| 0 | commercial-services | Advertising/Marketing Services | 4.534600e+10 | 3.48 | 1.39 | 2343000.0 | 39.0 |
| 1 | commercial-services | Commercial Printing/Forms | 2.222300e+10 | 1.99 | 1.00 | 1363000.0 | 14.0 |
| 2 | commercial-services | Financial Publishing/Services | 2.155210e+11 | 0.84 | 1.53 | 1190000.0 | 3.0 |
| 3 | commercial-services | Miscellaneous Commercial Services | 1.387000e+12 | 0.69 | 0.57 | 3860000.0 | 167.0 |
| 4 | commercial-services | Personnel Services | 3.340800e+10 | 1.75 | 0.68 | 418604.0 | 28.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 121 | transportation | Trucking | 1.590800e+11 | 0.69 | 1.99 | 615471.0 | 20.0 |
| 122 | utilities | Alternative Power Generation | 4.331800e+10 | 4.89 | 0.88 | 1147000.0 | 11.0 |
| 123 | utilities | Electric Utilities | 1.137000e+12 | 3.75 | 0.25 | 2696000.0 | 68.0 |
| 124 | utilities | Gas Distributors | 1.247760e+11 | 3.30 | -1.38 | 524596.0 | 20.0 |
| 125 | utilities | Water Utilities | 5.720800e+10 | 2.29 | -0.41 | 546334.0 | 14.0 |
126 rows × 7 columns
After a lot of debugging to be sure, that warning does not appear to apply to our scrape. Now that we have our webscraped data we can merge the two dataframes and update our treemap
# We need to make the sector names match before we joing our dataframes
sectors = ["commercial-services" , "consumer-durables", "consumer-non-durables", "consumer-services",
"distribution-services", "electronic-technology", "energy-minerals", "finance", "health-services", "health-technology",
"industrial-services", "miscellaneous", "non-energy-minerals", "process-industries", "producer-manufacturing",
"retail-trade", "technology-services", "transportation", "utilities"]
industry_df["sector"] = industry_df["sector"].str.replace('-', ' ').str.title()
merged_df = pd.merge(sector_df, industry_df, on='sector', suffixes=('_sector', '_industry'), how="left")
merged_df["All Sectors"] = "All Sectors" # In order to have a single root node
display(merged_df)
| sector | market_cap_sector | div_yield_sector | day_change_sector | volume_sector | industries | stocks_sector | industry | market_cap_industry | div_yield_industry | day_change_industry | volume_industry | stocks_industry | All Sectors | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Commercial Services | 1.703000e+12 | 0.82 | 0.72 | 3382000.0 | 5.0 | 251.0 | Advertising/Marketing Services | 4.534600e+10 | 3.48 | 1.39 | 2343000.0 | 39.0 | All Sectors |
| 1 | Commercial Services | 1.703000e+12 | 0.82 | 0.72 | 3382000.0 | 5.0 | 251.0 | Commercial Printing/Forms | 2.222300e+10 | 1.99 | 1.00 | 1363000.0 | 14.0 | All Sectors |
| 2 | Commercial Services | 1.703000e+12 | 0.82 | 0.72 | 3382000.0 | 5.0 | 251.0 | Financial Publishing/Services | 2.155210e+11 | 0.84 | 1.53 | 1190000.0 | 3.0 | All Sectors |
| 3 | Commercial Services | 1.703000e+12 | 0.82 | 0.72 | 3382000.0 | 5.0 | 251.0 | Miscellaneous Commercial Services | 1.387000e+12 | 0.69 | 0.57 | 3860000.0 | 167.0 | All Sectors |
| 4 | Commercial Services | 1.703000e+12 | 0.82 | 0.72 | 3382000.0 | 5.0 | 251.0 | Personnel Services | 3.340800e+10 | 1.75 | 0.68 | 418604.0 | 28.0 | All Sectors |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 109 | Transportation | 1.224000e+12 | 1.75 | 0.59 | 6066000.0 | 6.0 | 147.0 | Trucking | 1.590800e+11 | 0.69 | 1.99 | 615471.0 | 20.0 | All Sectors |
| 110 | Utilities | 1.366000e+12 | 3.68 | 0.36 | 3664000.0 | 4.0 | 113.0 | Alternative Power Generation | 4.331800e+10 | 4.89 | 0.88 | 1147000.0 | 11.0 | All Sectors |
| 111 | Utilities | 1.366000e+12 | 3.68 | 0.36 | 3664000.0 | 4.0 | 113.0 | Electric Utilities | 1.137000e+12 | 3.75 | 0.25 | 2696000.0 | 68.0 | All Sectors |
| 112 | Utilities | 1.366000e+12 | 3.68 | 0.36 | 3664000.0 | 4.0 | 113.0 | Gas Distributors | 1.247760e+11 | 3.30 | -1.38 | 524596.0 | 20.0 | All Sectors |
| 113 | Utilities | 1.366000e+12 | 3.68 | 0.36 | 3664000.0 | 4.0 | 113.0 | Water Utilities | 5.720800e+10 | 2.29 | -0.41 | 546334.0 | 14.0 | All Sectors |
114 rows × 14 columns
Now we can build our treemap data visaulization. We will need to normalize the data to be sure zero is in white, and the top gainers green with top losers the most red.
# Define a custom color scale
def plot_treemap(df):
# Need to normalize zero for the color scale
day_change_min = df['day_change_industry'].min()
day_change_max = df['day_change_industry'].max()
zero_normalized = -day_change_min / (day_change_max - day_change_min)
color_scale = [
[0, 'darkred'], # dark red for negative values
[zero_normalized, 'white'], # white at zero
[1, 'darkgreen'] # dark green for positive values
]
last_updated = datetime.now()
formatted_dt = last_updated.strftime('%Y-%m-%d %H:%M')
fig = px.treemap(df, path=["All Sectors", "sector", "industry"],
values="market_cap_industry", # Assuming market_cap_industry is the column for values
color="day_change_industry", # Assuming day_change_industry is the column for color
color_continuous_scale=color_scale,
hover_data=["market_cap_sector", "day_change_sector", "volume_sector", "industries", "stocks_sector"],
width=600, height=600)
fig.update_layout(margin=dict(t=40, l=5, r=5, b=5),
title=f"<b>Stock Sectors/Industry Day Change- (Updated {formatted_dt})</b>",
coloraxis_colorbar={"title":"Change (%)"},
font=dict(family="sans-serif",
size=11,
color="black"
))
fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)",
"paper_bgcolor": "rgba(0, 0, 0, 0)"}
)
fig.update_traces(root_color="lightgrey")
return fig
treemap_plot = plot_treemap(merged_df)
treemap_plot
left_panel = pn.Column(options_panel, treemap_plot, align="center")
left_panel.servable()
Now we can use a panel template for our dashboard, and start the server.
# Create a FastListTemplate
template = pn.template.FastListTemplate(title='Predictive Stock Dashboard using LSTM Machine Learning', theme="default")
# template.sidebar.append(pn.pane.Markdown("## Sidebar")) # If sidebar needed
template.main.append(pn.Row(left_panel, center_panel, right_panel))
template.servable()
template.show()
Launching server at http://localhost:65304
<panel.io.server.Server at 0x2069a74b700>